import numpy as np #linear algebra
import pandas as pd # data processing,CSV file I/O(e.g pd.read_csv)
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
data = sns.load_dataset("iris")
data
| sepal_length | sepal_width | petal_length | petal_width | species | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
| ... | ... | ... | ... | ... | ... |
| 145 | 6.7 | 3.0 | 5.2 | 2.3 | virginica |
| 146 | 6.3 | 2.5 | 5.0 | 1.9 | virginica |
| 147 | 6.5 | 3.0 | 5.2 | 2.0 | virginica |
| 148 | 6.2 | 3.4 | 5.4 | 2.3 | virginica |
| 149 | 5.9 | 3.0 | 5.1 | 1.8 | virginica |
150 rows × 5 columns
data.head()
| sepal_length | sepal_width | petal_length | petal_width | species | |
|---|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 | setosa |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 | setosa |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 | setosa |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 | setosa |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 | setosa |
data.shape
(150, 5)
data.duplicated().sum()
1
data.drop_duplicates(inplace=True)
data.duplicated().sum()
0
data.info()
<class 'pandas.core.frame.DataFrame'> Int64Index: 149 entries, 0 to 149 Data columns (total 5 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 sepal_length 149 non-null float64 1 sepal_width 149 non-null float64 2 petal_length 149 non-null float64 3 petal_width 149 non-null float64 4 species 149 non-null object dtypes: float64(4), object(1) memory usage: 7.0+ KB
data.columns
Index(['sepal_length', 'sepal_width', 'petal_length', 'petal_width',
'species'],
dtype='object')
#VISUALIZATION
plt.bar(data['species'],data['sepal_width'])
plt.xticks(rotation=90)
plt.show()
fig=px.bar(data,x='sepal_width',y='sepal_length',color='sepal_width')
fig.show()
fig=px.violin(data,x='petal_length',y='petal_width',color='petal_length')
fig.show()
plt.figure(figsize=(10,4))
sns.countplot(x='petal_length', data=data, color='b')
plt.title('petal_length')
plt.show()
plt.figure(figsize=(10,4))
top_car = data['petal_width'].value_counts().nlargest(10)
sns.countplot(y=data.petal_width, order=top_car.index, color='red')
<AxesSubplot:xlabel='count', ylabel='petal_width'>
sns.lineplot(x='sepal_length', y='species', data=data)
<AxesSubplot:xlabel='sepal_length', ylabel='species'>
sns.barplot(data['petal_length'],data['sepal_length'],color='r')
plt.xticks(rotation=90)
plt.show()
D:\anaconda files\lib\site-packages\seaborn\_decorators.py:36: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.
sns.displot(data["species"])
<seaborn.axisgrid.FacetGrid at 0x19415af7580>
sns.countplot(x='sepal_width',data=data)
plt.xticks(rotation=90)
(array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22]),
[Text(0, 0, '2.0'),
Text(1, 0, '2.2'),
Text(2, 0, '2.3'),
Text(3, 0, '2.4'),
Text(4, 0, '2.5'),
Text(5, 0, '2.6'),
Text(6, 0, '2.7'),
Text(7, 0, '2.8'),
Text(8, 0, '2.9'),
Text(9, 0, '3.0'),
Text(10, 0, '3.1'),
Text(11, 0, '3.2'),
Text(12, 0, '3.3'),
Text(13, 0, '3.4'),
Text(14, 0, '3.5'),
Text(15, 0, '3.6'),
Text(16, 0, '3.7'),
Text(17, 0, '3.8'),
Text(18, 0, '3.9'),
Text(19, 0, '4.0'),
Text(20, 0, '4.1'),
Text(21, 0, '4.2'),
Text(22, 0, '4.4')])
sns.boxplot(x='sepal_width',y='petal_width',data=data)
<AxesSubplot:xlabel='sepal_width', ylabel='petal_width'>
#MODEL BUILDING
X = data[['sepal_length','sepal_width','petal_length','petal_width']]
X.head()
| sepal_length | sepal_width | petal_length | petal_width | |
|---|---|---|---|---|
| 0 | 5.1 | 3.5 | 1.4 | 0.2 |
| 1 | 4.9 | 3.0 | 1.4 | 0.2 |
| 2 | 4.7 | 3.2 | 1.3 | 0.2 |
| 3 | 4.6 | 3.1 | 1.5 | 0.2 |
| 4 | 5.0 | 3.6 | 1.4 | 0.2 |
y = data['species']
y.head()
0 setosa 1 setosa 2 setosa 3 setosa 4 setosa Name: species, dtype: object
from sklearn.preprocessing import LabelEncoder
le = LabelEncoder()
y=le.fit_transform(y)
from sklearn.model_selection import train_test_split
X_train,X_test,y_train,y_test = train_test_split(X, y, test_size=0.3)
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier(n_estimators=50, learning_rate=1, random_state=0)
model = abc.fit(X_train, y_train)
y_pred = model.predict(X_test)
from sklearn.metrics import accuracy_score
print("AdaBoost Classifier Model Accuracy:",accuracy_score(y_test, y_pred))
AdaBoost Classifier Model Accuracy: 0.9555555555555556
data.to_csv("adaboost 1.csv")